home *** CD-ROM | disk | FTP | other *** search
- #
- # utilities.py
- # JunkMatcher
- #
- # Created by Benjamin Han on 2/1/05.
- # Copyright (c) 2005 Benjamin Han. All rights reserved.
- #
-
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
-
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
-
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
- #!/usr/bin/env python
-
- import sets
-
- from consts import *
-
- # for some reason cjkcodecs is only in effect when doing the following
- import codecs
- import cjkcodecs
- from cjkcodecs import aliases
-
- import objc
- from Foundation import * # for NSLog
-
- # for correcting common mispellings of charsets
- charsetMispellings = {'big-5':'big5'}
-
- _DECODE_ERROR_STRATEGY = 'ignore'
- _ENCODE_ERROR_STRATEGY = 'ignore'
- _DEFAULT_ENCODING = 'utf8' # for strings with None encoding, treat them like UTF-8
-
-
- class Lazy (object):
- """A discriptor class used for decorator: for lazy initialization."""
- def __init__ (self, calculate_function):
- self._calculate = calculate_function
-
- def __get__ (self, obj, _=None):
- #if obj is None:
- # return self
- value = self._calculate(obj)
- setattr(obj, self._calculate.func_name, value)
- return value
-
-
- def decodeText (text, encoding = None):
- """Decode a text encoded by 'encoding' - if 'encoding' is None, decode using UTF-8
- encoding; returns a Unicode object."""
- if encoding:
- try:
- return text.decode(encoding, _DECODE_ERROR_STRATEGY)
- except:
- return text.decode(_DEFAULT_ENCODING, _DECODE_ERROR_STRATEGY)
-
- return text.decode(_DEFAULT_ENCODING, _DECODE_ERROR_STRATEGY)
-
- def decodeTextList (textList, encodingSet):
- """Decode textList: textList is a list of tuples (text, encoding); encodings will be
- added to encodingSet; returns a Unicode object."""
- # correct possible mispellings
- strList = []
- for t, e in textList:
- mispelling = charsetMispellings.get(e)
- if mispelling: e = mispelling
- if e: encodingSet.add(e)
- strList.append(decodeText(t, e))
-
- return ''.join(strList)
-
- def encodeText (text, encoding = _DEFAULT_ENCODING):
- """Encode text using the given encoding; if encoding is None use
- the _DEFAULT_ENCODING."""
- if encoding:
- try:
- return text.encode(encoding, _ENCODE_ERROR_STRATEGY)
- except:
- return text.encode(_DEFAULT_ENCODING, _ENCODE_ERROR_STRATEGY)
-
- return text.encode(_DEFAULT_ENCODING, _ENCODE_ERROR_STRATEGY)
-
- def openFile (fn, mode = 'r', encoding = _DEFAULT_ENCODING):
- """Open a file using an encoding - every user editable file (via GUI) should
- be opened using this with a proper encoding (default: utf8)!"""
- return codecs.open(fn, mode, encoding, 'strict') # don't allow errors!
-
- def printException (msg, e):
- """Use NSLog to print information about Exception e."""
- info = str(e)
- if info:
- NSLog(u'%s: %s' % (msg, info))
- else:
- NSLog(u'%s' % msg)
-